#### Measuring support for liberal democracy
#### Code for revised draft of paper
#### Version 5 - February 2024


library(tidyverse)
library(dplyr)
library(psych)
library(lavaan)
library(car)
library(semTable)
library(psychTools)
library(tth)
library(haven)
library(RColorBrewer)
library(matrixStats)
library(pewmethods)


# read data file
sd_dat = read.csv("peru_dat.csv")

# examine responses
names(sd_dat)
summary(sd_dat)
sapply(sd_dat, table, useNA="ifany") # check if DKs and NAs are included and how they are coded


## Edit sup dem questions

sd17 = sd_dat[, 8:24]
names(sd17) = c("FREXP1", "FREXP2", "FRASSC1", "FRASSC2", "FRASSC3", "UNISUFF1", "UNISUFF2", 
                "DECELEC1", "DECELEC2", "FRELECT1", "FRELECT2", "JUDCNSTR1", "JUDCNSTR2", 
                "LEGCNSTR1", "LEGCNSTR2", "EQLAW1", "EQLAW2")


## Plot item distributions

# colour palettes 
lik5 = c(brewer.pal(5, "BrBG"), "#b4b1b1")
lik5_labs = c("StrAg", "SomeAg", "Neither", "SomeDis",
              "StrDis", "DK")

sd_plot = sd17

# plot
pdf("supdem_distr_peru.pdf", height=8, width=6)
par(mfrow=c(5, 4), mar=c(3, 2, 0.5, 0.5), tcl=-0.2, cex=0.9, las=2, mgp=c(1.8, 0.9, 0))
for(i in 1:length(sd_plot)) {
  barplot(height = table(sd_plot[, names(sd_plot)[i]]) / dim(sd_plot)[1] * 100, names.arg=lik5_labs, 
          axes=FALSE, col=lik5, cex.names=0.7, mgp=c(1, 0.2, 0), ylim=c(0, 80))
  axis(side=2, labels=TRUE, cex.axis=0.7, mgp=c(1, 0.4, 0))
  text(x=1.5, y=75, names(sd_plot)[i], cex=0.8, adj=0)
}
dev.off()

## Recode to orient all items in a pro-democratic direction and DKs as "neither"; 
## change 99 to whatever code is used for DKs; if none, delete "99=3"
## This assumes the response set runs from 1=strongly agree to 5=strongly disagree;
## change as appropriate 
sd_dat_r = sd17
dem_vnam = c("FREXP1", "FRASSC2", "UNISUFF2", "DECELEC2", "FRELECT1", "JUDCNSTR1", "LEGCNSTR2", "EQLAW2")
aut_vnam = c("FREXP2", "FRASSC1", "FRASSC3", "UNISUFF1", "DECELEC1", "FRELECT2", "JUDCNSTR2", "LEGCNSTR1", "EQLAW1")
sd_dat_r[, dem_vnam] = lapply(sd_dat_r[, dem_vnam], car::Recode, "1=5; 2=4; 3=3; 4=2; 5=1; 99=3; else=NA")
sd_dat_r[, aut_vnam] = lapply(sd_dat_r[, aut_vnam], car::Recode, "1=1; 2=2; 3=3; 4=4; 5=5; 99=3; else=NA")
sd_dat[, dem_vnam] = sd_dat_r[, dem_vnam]
sd_dat[, aut_vnam] = sd_dat_r[, aut_vnam]
sd17 = sd_dat[, c("FREXP1", "FREXP2", "FRASSC1", "FRASSC2", "FRASSC3", "UNISUFF1", "UNISUFF2", 
                  "DECELEC1", "DECELEC2", "FRELECT1", "FRELECT2", "JUDCNSTR1", "JUDCNSTR2", 
                  "LEGCNSTR1", "LEGCNSTR2", "EQLAW1", "EQLAW2")]


## create weights

# recode demogs
sd_dat$age_cat = ifelse(sd_dat$age < 25, 1, 
                        ifelse(sd_dat > 24 & sd_dat < 35, 2,
                               ifelse(sd_dat > 34 & sd_dat < 45, 3,
                                      ifelse(sd_dat > 44 & sd_dat < 55, 4,
                                             ifelse(sd_dat > 54 & sd_dat < 65, 5, 6)))))
sd_dat$gender = as.numeric(as.factor(sd_dat$gender))
sd_dat$gender = ifelse(sd_dat$gender > 2, NA, sd_dat$gender)
sd_dat$state = ifelse(sd_dat$state == "Prefiero no responder", NA, sd_dat$state)
sd_dat$state = as.numeric(as.factor(sd_dat$state))
sd_dat = sd_dat %>% mutate(across(c(age_cat, gender, state), as.factor))
sapply(sd_dat[, c("age_cat", "gender", "state")], table, useNA = "ifany")

# trim dataset to remove cases with NAs for demogs
sd_dat_t = sd_dat[!is.na(sd_dat$gender), ]

# population data
pop_margins = list(
  tibble(gender = as.factor(c(1, 2)), Freq = c(49, 51)),
  tibble(age_cat = as.factor(1:6), Freq = c(17, 23, 20, 16, 11, 12)),
  tibble(state = as.factor(1:23), Freq = 
           c(1.2, 3.4, 1.3, 4.4, 2.0, 6.2, 3.0, 4.4, 1.5, 2.5, 2.3, 4.5, 6.5, 
             3.7, 33.1, 3.0, 0.4, 8.1, 5.0, 0.2, 1.0, 0.7, 1.4))
)

# raking
survey_weights = rake_survey(
  .data = sd_dat_t,
  pop_margins = pop_margins,
  scale_to_n = TRUE
)

# trim
sd_dat_t$wt_new = survey_weights
sd_dat_t$wt_new = trim_weights(survey_weights)
summary(sd_dat_t$wt_new)


## Reliability and dimensionality

# alpha
sd_cor = cor(sd17, use="pair")
psych::alpha(sd_cor)
alph_out <- psych::alpha(sd_cor)
write.csv(alph_out[[1]], file="sd_alpha.csv")

# eigenvalues of principal components
eigen(sd_cor)$values
write.csv(eigen(sd_cor)$values, file="sd_eigen.csv")

# 1-factor EFA
psych::fa(sd17, nfactors=1)
efa1 = psych::fa(sd17, nfactors=1)
efa_html = tth(fa2latex(efa1))
writeLines(efa_html, "sd_efa1.html")

# ordinal
sd_pcor = polychoric(sd17)$rho
write.csv(sd_pcor, "sd_pcormat.csv", row.names=TRUE)
psych::fa(sd_pcor, nfactors=1, n.obs=dim(sd_dat)[1])
efa_ord = psych::fa(sd_pcor, nfactors=1, n.obs=dim(sd17)[1])
efa_ord_html = tth(fa2latex(efa_ord))
writeLines(efa_ord_html, "sd_efa1_ord.html")


## CFA models

# liberal democracy factor with orthogonal methods factor

cfa_mod_1 = '
SupLD =~ FREXP1 + FREXP2 + FRASSC1 + FRASSC2 + FRASSC3 + UNISUFF1 + UNISUFF2 
          + DECELEC1 + DECELEC2 + FRELECT1 + FRELECT2 + JUDCNSTR1 + JUDCNSTR2 
          + LEGCNSTR1 + LEGCNSTR2 + EQLAW1 + EQLAW2
PosVal =~ FREXP1 + FRASSC2 + UNISUFF2 + DECELEC2 + FRELECT1 + JUDCNSTR1 + LEGCNSTR2 + EQLAW2
'
sd_cfa_1_std = cfa(cfa_mod_1, data=sd_dat_t, estimator="MLR", orthogonal=TRUE, std.lv=TRUE, 
                   sampling.weights="wt_new")
semTable(sd_cfa_1_std, paramSets = c("fits", "loadings", "latentvariances", "latentcovariances"),
         fits=c("chisq", "cfi", "rmsea", "srmr"), columns=c("est", "se", "p"),
         type="html", file="cfa_1fac_std_table_w.html")
sd_cfa_1_std_fit = fitMeasures(sd_cfa_1_std, output = "matrix",
                               fit.measures = c("cfi", "cfi.robust", "rmsea", "rmsea.robust", "srmr"))
write.csv(sd_cfa_1_std_fit, file = "cfa_1fac_std_fit_w.csv", row.names = TRUE)

# liberal democracy factor with orthogonal methods factor, ordinal

sd_cfa_1_std = cfa(cfa_mod_1, data=sd_dat_r, estimator="WLSMV", orthogonal=TRUE, ordered=TRUE, 
                   std.lv=TRUE)
summary(sd_cfa_1_std, fit.measures=TRUE)
semTable(sd_cfa_1_std, paramSets = c("fits", "loadings", "latentvariances", "latentcovariances"),
         fits=c("chisq", "cfi", "rmsea", "srmr"), columns=c("est", "se", "p"), 
         type="html", file="cfa_1fac_ord_std_table.html")
sd_cfa_1_std_fit = fitMeasures(sd_cfa_1_std,  output = "matrix",
                               fit.measures = c("cfi", "cfi.robust", "rmsea", "rmsea.robust", "srmr"))
write.csv(sd_cfa_1_std_fit, file = "cfa_1fac_std_ord_fit.csv", row.names = TRUE)

# electoral democracy and rule of laws factors with orthogonal methods factor

cfa_mod_2 = '
SupED =~ FREXP1 + FREXP2 + FRASSC1 + FRASSC2 + FRASSC3 + UNISUFF1 + UNISUFF2 
          + DECELEC1 + DECELEC2 + FRELECT1 + FRELECT2
SupRL =~  JUDCNSTR1 + JUDCNSTR2 + LEGCNSTR1 + LEGCNSTR2 + EQLAW1 + EQLAW2
PosVal =~ FREXP1 + FRASSC2 + UNISUFF2 + DECELEC2 + FRELECT1 + JUDCNSTR1 + LEGCNSTR2 + EQLAW2
PosVal ~~ 0*SupED
PosVal ~~ 0*SupRL
SupED ~~ start(0.8)*SupRL
'
sd_cfa_2_std = cfa(cfa_mod_2, data=sd_dat_t, estimator="MLR", std.lv=TRUE, 
                   sampling.weights="wt_new")
summary(sd_cfa_2_std, fit.measures=TRUE)
semTable(sd_cfa_2_std, paramSets = c("fits", "loadings", "latentvariances", "latentcovariances"),
         fits=c("chisq", "cfi", "rmsea", "srmr"), columns=c("est", "se", "p"),
         type="html", file="cfa_2fac_std_table_w.html")
sd_cfa_2_std_fit = fitMeasures(sd_cfa_2_std, output = "matrix",
                               fit.measures = c("cfi", "cfi.robust", "rmsea", "rmsea.robust", "srmr"))
write.csv(sd_cfa_2_std_fit, file = "cfa_2fac_std_fit_w.csv", row.names = TRUE)

# electoral democracy and rule of laws factors with orthogonal methods factor

sd_cfa_2_std = cfa(cfa_mod_2, data=sd_dat_t, estimator="WLSMV", ordered=TRUE, 
                   std.lv=TRUE)
summary(sd_cfa_2_std, fit.measures=TRUE)
semTable(sd_cfa_2_std, paramSets = c("fits", "loadings", "latentvariances", "latentcovariances"),
         fits=c("chisq", "cfi", "rmsea", "srmr"), columns=c("est", "se", "p"), 
         type="html", file="cfa_2fac_ord_std_table.html")
sd_cfa_2_std_fit = fitMeasures(sd_cfa_2_std, output = "matrix",
                               fit.measures = c("cfi", "cfi.robust", "rmsea", "rmsea.robust", "srmr"))
write.csv(sd_cfa_2_std_fit, file = "cfa_2fac_std_ord_fit.csv", row.names = TRUE)

# lr test - 1 v 2-fac models

lavTestLRT(sd_cfa_1_std, sd_cfa_2_std, method = "satorra.bentler.2010")
write.csv(lavTestLRT(sd_cfa_2_std, sd_cfa_1_std, method = "satorra.bentler.2010"), 
          file = "lrtest_cfas.csv", row.names = TRUE)


## Trimmed 7-item scale

sd_dat_7 = sd_dat_r[, c("FREXP2", "FRASSC1", "UNISUFF1", "FRELECT2", "JUDCNSTR2", "LEGCNSTR1", "EQLAW1")]

# 1-factor CFA

cfa_mod_7i = 'SupLD =~ FREXP2 + FRASSC1 + UNISUFF1 + FRELECT2 + JUDCNSTR2 + LEGCNSTR1 + EQLAW1'

sd_cfa_7i_std = cfa(cfa_mod_7i, data=sd_dat_t, estimator="MLR", std.lv=TRUE, 
                    sampling.weights="wt_new")
summary(sd_cfa_7i_std, fit.measures=TRUE)
semTable(sd_cfa_7i_std, paramSets = c("fits", "loadings", "latentvariances", "latentcovariances"),
         fits=c("chisq", "cfi", "rmsea", "srmr"), columns=c("est", "se", "p"),
         type="html", file="cfa_7it_std_table_w.html")
sd_cfa_7i_fit = fitMeasures(sd_cfa_7i_std, output = "matrix",
                            fit.measures = c("cfi", "cfi.robust", "rmsea", "rmsea.robust", "srmr"))
write.csv(sd_cfa_7i_fit, file = "cfa_7it_std_fit_w.csv", row.names = TRUE)

# 1-factor CFA, ordinal

sd_cfa_7i_ord = cfa(cfa_mod_7i, data=sd_dat_r, estimator="WLSMV", ordered=TRUE, std.lv=TRUE)
summary(sd_cfa_7i_ord, fit.measures=TRUE)
semTable(sd_cfa_7i_ord, paramSets = c("fits", "loadings", "latentvariances", "latentcovariances"),
         fits=c("chisq", "cfi", "rmsea", "srmr"), columns=c("est", "se", "p"),
         type="html", file="cfa_7it_ord_std_table.html")
sd_cfa_7i_fit = fitMeasures(sd_cfa_7i_ord, output = "matrix",
                            fit.measures = c("cfi", "cfi.robust", "rmsea", "rmsea.robust", "srmr"))
write.csv(sd_cfa_7i_fit, file = "cfa_7it_ord_std_fit.csv", row.names = TRUE)

# eigenvalues of 7-item scale
sd7_cor = cor(sd_dat_7, use="pair")
eigen(sd7_cor)$values
write.csv(eigen(sd7_cor)$values, file="sd7_eigen.csv")

# reliability of 7-item scale
psych::alpha(sd7_cor)
alph7_out <- psych::alpha(sd7_cor)
write.csv(alph7_out[[1]], file="sd7_alpha.csv")
# ordinal reliability
sd7_pcor = polychoric(sd_dat_7)$rho
alph7_ord_out <- psych::alpha(sd7_pcor)
write.csv(alph7_ord_out[[1]], file="sd7_ord_alpha.csv")

# create additive 7-item scale
sd_dat$SUPDEM_7IT = rowMeans(sd_dat_7)


## Correlations with criterion variables

# Left-right / lib-cons ideology - LR_IDEOL or LC_IDEOL: recode such that right / conservative is high
table(sd_dat$ideo_1, useNA="ifany")
sd_dat$LR_IDEOL = car::recode(sd_dat$ideo_1, "99=NA")

cor_items = c("SUPDEM_7IT", "LR_IDEOL")
crit_cor = mixedCor(sd_dat[, cor_items])$rho
write.csv(crit_cor, file="sd7_crit_cormat.csv")
